import os
import openai
import random
import numpy as np
import json
import jsonlines
import time
from tqdm import tqdm
from rank_bm25 import BM25Okapi
import threading

# OPENAI_API_KEY = "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"
# OPENAI_API_KEY = "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"
OPENAI_API_KEY = "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"
openai.api_key = OPENAI_API_KEY

Example_prompt = '''
Here is the example, please follow this example.
Step-by-Step Analysis:
Step 1: Initial Perception
Option A: The scene is dynamic, involving action (a woman hitting an eight ball with a racket). Requires visualization of the action.
Option B: The scene is static, involving objects (a woman holding a tennis racket and a billiard ball). Instantly recognized without needing to visualize any action.
Which is better? Option B. It's simpler to process a static scene than to visualize an action.

Step 2: Recognizing Incongruity
Option A: Requires understanding that hitting an eight ball with a racket is unusual, needing knowledge of both tennis and billiards.
Option B: The incongruity of holding items from two different sports is immediately apparent, without needing specific sports knowledge.
Which is better? Option B. It presents a straightforward incongruity without requiring specialized knowledge.

Step 3: Contextual Analysis
Option A: One must consider the typical use of a racket (in tennis) and an eight ball (in billiards), and why combining them is odd.
Option B: There’s no need for a detailed contextual analysis; the oddity is self-evident due to the juxtaposition of unrelated sports equipment.
Which is better? Option B. It requires less background knowledge and less contextual analysis.

Step 4: Linking to the Question
Option A: The viewer must connect the action's oddity to the question's focus on strangeness.
Option B: The direct visual incongruity answers the question of strangeness without additional interpretation.
Which is better? Option B. It provides a more direct answer to the question with less interpretative effort.
'''


def ask_gpt4(question, thread_id, file_lock, line, unanswered_questions):
    messages=[{"role": "user", "content": question}]
    attempt_time = 0
    max_time = 20
    while attempt_time < max_time:
        try:
            response =  openai.ChatCompletion.create(
                            model="gpt-4",
                            max_tokens=1000,
                            temperature=1.2,
                            messages = messages)
            answer = response["choices"][0]["message"]["content"]

            with file_lock:
                with open('./gpt4_ans/whoops/anscot/test.jsonl','a') as outfile:
                    line['gpt4_rate'] = answer
                    outfile.write(json.dumps(line) + "\n")
                    break

        except openai.error.RateLimitError: # Rate limit exceeded
            attempt_time += 1
            time.sleep(0.2)
        except openai.error.Timeout: # Rate limit exceeded
            attempt_time += 1
            time.sleep(0.2)
        except openai.error.OpenAIError:
            attempt_time += 1
            raise Exception("Sorry, a problem happened")
    if attempt_time == max_time:
        unanswered_questions.append((question, thread_id, line))
        

def read_jsonline(sample_file):
    samples = []
    for line in sample_file.iter():
        sample = '''The option A: %s''' %(line['crowd_underspecified_captions'][0])
        samples.append(sample)
    return samples
        
if __name__=="__main__":
    caption_file = jsonlines.open('./data/whoops/random_icl/test.jsonl')
    explanation_file = jsonlines.open('./data/whoops/cb_icl/test.jsonl')
    corpus = read_jsonline(caption_file)
    file_lock = threading.Lock()
    threads = []
    unanswered_questions = []
    with tqdm(desc='Process', unit='it', total=51) as pbar: #5_6: (260); 10_12: (85); swow: (84)
        num = 0
        for line in explanation_file.iter():
            option_a = corpus[num]
            option_b = '''The option B: %s''' %(line['explanations'][0])
            start_prompt = '''Evaluate the equivalence of the following two captions for the question "Could you clarify why it seems to describe a scene that is considered strange or out of the ordinary?" ''' 
            middle_prompt = '''%s; %s''' %(option_a, option_b)
            last_prompt = ''' Please follow the same four step comparison method (Step 1: Initial Perception; Step 2: Recognizing Incongruity; Step 3: Contextual Analysis; Step 4: Linking to the Question) and analyze in each step which option is better.'''
            content = f'''{start_prompt}{middle_prompt}{last_prompt}{Example_prompt}'''
            thread = threading.Thread(target=ask_gpt4, args=(content, num+1, file_lock, line, unanswered_questions))
            threads.append(thread)
            thread.start()
            num = num+1
            pbar.update()

        for thread in threads:
            thread.join()

        if unanswered_questions:
            retry_threads = []
            for question, thread_id, line in unanswered_questions:
                retry_thread = threading.Thread(target=ask_gpt4, args=(question, thread_id, file_lock, line, []))
                retry_threads.append(retry_thread)
                retry_thread.start()

            for thread in retry_threads:
                thread.join()
